package ru.sut.excel.parse.util;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Pattern;

/**
 *
 * @author pol
 */
public class TokenizeText {

    public static Set<String> getTokens(ArrayList<String> text, Set<String> tokens) {
        setOfTokens = new HashSet<String>();
        for (String s : text) {
            
            StringTokenizer stringTokens = new StringTokenizer(s, " .,_\"");
            while (stringTokens.hasMoreElements()) {
                String token = stringTokens.nextToken().toString();
//                System.out.println(Pattern.matches("[a-zA-Z]*", token));
                if (token.length() > 1) {
                    setOfTokens.add(token);
//                    System.out.println(token);
                }
            }
        }
        for (String end : tokens) {
            System.out.println("End = \t\t " + end);
            for (String s : setOfTokens) {
                if (s.endsWith(end) && Character.isUpperCase(s.charAt(0))) {
                    System.out.println(s);
                }

            }
        }
        return setOfTokens;
    }
    private static Set<String> setOfTokens;
}
